Last updated 4 Dec 2020 by Steve Formel
Description: Is there a relationship between diversity of salt marsh soil microbial communities and oil abundance?
library(cowplot)
library(tidyverse)
library(brms)
Here I use a convenient phyloseq function to generate all three metrics.
source("./S2_MGF_load_packages_and_clean_data.R")
p <- plot_richness(fung.2season_with_outliers)
#convert value to Hill numbers
p1 <- p$data
p1$hill_value <- NA
p1$value[which(p1$variable == "Observed")] <- p1$value[which(p1$variable == "Observed")]
p1$value[which(p1$variable == "Shannon")] <- exp(p1$value[which(p1$variable == "Shannon")])
p1$value[which(p1$variable == "Simpson")] <- 1/(1 - p1$value[which(p1$variable == "Simpson")])
p1$hill_order <- NA
p1$hill_order[which(p1$variable == "Observed")] <- 0
p1$hill_order[which(p1$variable == "Shannon")] <- 1
p1$hill_order[which(p1$variable == "Simpson")] <- 2
#rename levels for site
p1$site <- as.factor(p1$site)
levels(p1$site) <- plyr:::revalue(levels(p1$site), c("BJ" = "Heavily Oiled", "F" = "Lightly Oiled"))
#total PAHs----
p1 <- p1 %>%
filter(variable %in% c("Observed", "Shannon", "Simpson")) %>%
droplevels()
p.list <- split.data.frame(x = p1, f = p1$variable)
#also for plotting ultimate figure
p.fung <-p.list
based on https://solomonkurz.netlify.app/post/robust-linear-regression-with-the-robust-student-s-t-distribution/ and https://bayesed-and-confused.netlify.app/post/model-fit-checks/
I look at the models as they’re generated and ultimately create a table of beta and R-squared values.
This is the equivalent of richness.
f0 <- brm(value ~ Total.relevant.PAHs,
data = p.list[[1]],
family = "gaussian",
chains = 4, cores = 4,
seed = 1)
M <- f0
pp_check(object = M, type = "dens_overlay", nsamples = 100)
pp_check(M, type = "stat", stat = 'median', nsamples = 100)
pp_check(M, type = "stat", stat = 'mean', nsamples = 100)
pp_check(M,type = 'intervals')
plot(M)
summ <- as.data.frame(posterior_summary(M))
#####R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))
#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "0"
df.plot$param <- rownames(df.plot)
df.plot.done <- df.plot
This is the equivalent of Shannon Diversity.
f1 <- brm(value ~ Total.relevant.PAHs,
data = p.list[[2]],
family = "skew_normal",
chains = 4, cores = 4,
seed = 1)
M <- f1
pp_check(object = M, type = "dens_overlay", nsamples = 100)
pp_check(M, type = "stat", stat = 'median', nsamples = 100)
pp_check(M, type = "stat", stat = 'mean', nsamples = 100)
pp_check(M,type = 'intervals')
plot(M)
summ <- as.data.frame(posterior_summary(M))
#R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))
#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "1"
df.plot$param <- rownames(df.plot)
df.plot.done <- rbind(df.plot.done, df.plot)
This is the equivalent of Simposon Diversity.
f2 <- brm(value ~ Total.relevant.PAHs,
data = p.list[[3]],
family = "skew_normal",
chains = 4, cores = 4,
seed = 1,
control = list(adapt_delta = 0.99))
M <- f2
pp_check(object = M, type = "dens_overlay", nsamples = 100)
pp_check(M, type = "stat", stat = 'median', nsamples = 100)
pp_check(M, type = "stat", stat = 'mean', nsamples = 100)
pp_check(M,type = 'intervals')
plot(M)
print(M)
## Family: skew_normal
## Links: mu = identity; sigma = identity; alpha = identity
## Formula: value ~ Total.relevant.PAHs
## Data: p.list[[3]] (Number of observations: 82)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 5.94 0.37 5.27 6.74 1.00 1578 1599
## Total.relevant.PAHs -0.00 0.00 -0.01 0.00 1.00 1851 1622
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.61 0.29 3.11 4.25 1.00 1460 1787
## alpha 8.20 2.42 3.99 13.37 1.00 1863 1738
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
summ <- as.data.frame(posterior_summary(M))
#R2
r2 <- as.data.frame(bayes_R2(object = M,resp = NULL,summary = TRUE,robust = FALSE,probs = c(0.025, 0.975)))
#get parameters for plotting
df.plot <- rbind(summ[2,], r2[1,])
df.plot$hill_order <- "2"
df.plot$param <- rownames(df.plot)
df.plot.done <- rbind(df.plot.done, df.plot)
df.plot.done$microbe <- "Fungi"
fungi.plot.done <- df.plot.done
plot.params.df <- fungi.plot.done
plot.params.df
## Estimate Est.Error Q2.5 Q97.5
## b_Total.relevant.PAHs -0.009470966 0.020140572 -4.844743e-02 0.029914825
## R2 0.014453834 0.018587004 1.856109e-05 0.067263744
## b_Total.relevant.PAHs1 -0.003995385 0.006477364 -1.929337e-02 0.005685893
## R21 0.016623170 0.027595167 1.164506e-05 0.101515224
## b_Total.relevant.PAHs2 -0.002493690 0.004200763 -1.264174e-02 0.003404719
## R22 0.015367593 0.026486141 7.171018e-06 0.097598599
## hill_order param microbe
## b_Total.relevant.PAHs 0 b_Total.relevant.PAHs Fungi
## R2 0 R2 Fungi
## b_Total.relevant.PAHs1 1 b_Total.relevant.PAHs Fungi
## R21 1 R2 Fungi
## b_Total.relevant.PAHs2 2 b_Total.relevant.PAHs Fungi
## R22 2 R2 Fungi
write.csv(plot.params.df, "../../../../results/images/manuscript/S2_MGF_final/S2_MGF_oil_div_results.csv")